# -*- coding: utf-8 -*-
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.dao.sku_dao import SkuDao
from zc_core.dao.item_pool_dao import ItemPoolDao
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.util.batch_gen import time_to_batch_no
from zc_core.util.http_util import retry_request
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from zc_core.spiders.base import BaseSpider
from chng.rules import *
from zc_core.client.mongo_client import Mongo


class FullSpider(BaseSpider):
    name = "fix_catalog"
    # 详情页url
    item_url = "http://mall.ec.chng.com.cn/scm-hn-oauth-web/obs/business/product/managerView/getProductCategory?productId={}"

    def __init__(self, batchNo=None, delta_day=-2, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, delta_day=delta_day, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)

    def start_requests(self):
        sku_list = Mongo().get_collection('sku_pool').distinct("catalog3Id", {"catalog1Id": {"$exists": False}})
        self.logger.info('全量：%s' % (len(sku_list)))

        for sku1 in sku_list:
            sku = Mongo().get_collection('sku_pool').find_one({"catalog3Id": sku1})
            sku_id = sku.get("_id")
            catalog3_id = sku.get('catalog3Id')
            yield Request(
                url=self.item_url.format(sku_id),
                headers={
                    "Accept": "application/json, text/plain, */*",
                    "Accept-Encoding": "gzip, deflate",
                    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "Host": "mall.ec.chng.com.cn",
                    "Pragma": "no-cache",
                    "Referer": "http://mall.ec.chng.com.cn/item?productId={}".format(sku_id),
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36 Edg/92.0.902.55",
                },
                meta={
                    'reqType': 'catalog',
                    'batchNo': self.batch_no,
                    "skuId": sku_id,
                    "catalog3Id": catalog3_id
                },
                callback=self.parse_item_data,
                errback=self.error_back
            )

    def parse_item_data(self, response):
        meta = response.meta
        sku_id = meta.get("skuId")
        item = fix_catalog(response)
        if item:
            self.logger.info('池子补品类:[%s] [%s]' % (meta.get('catalog3Id'),item.__len__()))
            yield Box('catalog', self.batch_no, item)
